library(tidyverse)
library(lubridate)
library(janitor)
library(here)
library(hrbrthemes)
library(kableExtra)
library(DT)
library(skimr)
library(outbreaks)
library(incidence2)
library(epicontacts)
library(epitrix)
theme_set(theme_minimal())
measles_data <- measles_hagelloch_1861 %>%
tibble()
skim(measles_data)
Data summary
|
|
|
|
Name
|
measles_data
|
|
Number of rows
|
188
|
|
Number of columns
|
12
|
|
_______________________
|
|
|
Column type frequency:
|
|
|
Date
|
3
|
|
factor
|
3
|
|
numeric
|
6
|
|
________________________
|
|
|
Group variables
|
None
|
Variable type: Date
|
skim_variable
|
n_missing
|
complete_rate
|
min
|
max
|
median
|
n_unique
|
|
date_of_prodrome
|
0
|
1.00
|
1861-10-30
|
1862-01-24
|
1861-12-01
|
36
|
|
date_of_rash
|
0
|
1.00
|
1861-11-03
|
1862-01-27
|
1861-12-05
|
35
|
|
date_of_death
|
176
|
0.06
|
1861-11-18
|
1861-12-28
|
1861-12-13
|
8
|
Variable type: factor
|
skim_variable
|
n_missing
|
complete_rate
|
ordered
|
n_unique
|
top_counts
|
|
gender
|
11
|
0.94
|
FALSE
|
2
|
m: 94, f: 83
|
|
class
|
0
|
1.00
|
FALSE
|
3
|
0: 90, 2: 68, 1: 30
|
|
complications
|
0
|
1.00
|
FALSE
|
1
|
yes: 188
|
Variable type: numeric
|
skim_variable
|
n_missing
|
complete_rate
|
mean
|
sd
|
p0
|
p25
|
p50
|
p75
|
p100
|
hist
|
|
case_ID
|
0
|
1.00
|
94.50
|
54.42
|
1.0
|
47.75
|
94.5
|
141.25
|
188
|
▇▇▇▇▇
|
|
infector
|
4
|
0.98
|
82.15
|
64.20
|
1.0
|
31.00
|
50.5
|
153.00
|
188
|
▇▇▃▂▇
|
|
age
|
0
|
1.00
|
6.99
|
4.30
|
0.0
|
3.00
|
7.0
|
11.00
|
15
|
▇▆▃▇▃
|
|
family_ID
|
0
|
1.00
|
30.95
|
16.59
|
1.0
|
18.00
|
31.0
|
43.00
|
69
|
▅▇▇▆▂
|
|
x_loc
|
0
|
1.00
|
188.22
|
59.09
|
7.5
|
145.00
|
182.5
|
240.00
|
280
|
▁▂▇▆▇
|
|
y_loc
|
0
|
1.00
|
133.78
|
67.04
|
5.0
|
80.00
|
147.5
|
187.50
|
240
|
▃▃▃▇▅
|
Building an Epicurve
Incidence
raw_incidence <- incidence(
measles_data,
date_of_prodrome,
interval = "week",
groups = c(gender, class)
)
facet_plot(
raw_incidence,
fill = gender,
facets = class,
show_cases = TRUE
) +
scale_fill_viridis_d(na.value = "black") +
labs(
title = "Epicurve of the Hagelloch 1861 measles outbreak",
subtitle = "Stratified by class and gender",
x = "Date of onset",
y = "Number of cases (daily)"
)
## Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.

Cumulative Incidence
raw_incidence %>%
cumulate() %>%
facet_plot(
fill = gender,
facets = class
# show_cases = TRUE
) +
scale_fill_viridis_d(na.value = "black") +
labs(
title = "Cumulative Epicurve of the Hagelloch 1861 measles outbreak",
subtitle = "Stratified by class",
x = "Date of onset",
y = "Number of cases (daily)"
)
## Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.

Estimating delays
measles_data <- measles_data %>%
mutate(delay_rash = as.integer(date_of_rash - date_of_prodrome))
ggplot(measles_data, aes(x = delay_rash)) +
geom_bar(fill = "#d15656") +
labs(
title = "Empirical distribution of delays from onset to rash",
x = "Days from onset of symptoms to rash",
y = "Number of cases"
)

delay_rash_fit <- measles_data %>%
pull(delay_rash) %>%
epitrix::fit_disc_gamma()
delay_rash_fit
## $mu
## [1] 4.446486
##
## $cv
## [1] 0.3785482
##
## $sd
## [1] 1.68321
##
## $ll
## [1] -360.0598
##
## $converged
## [1] TRUE
##
## $distribution
## A discrete distribution
## name: gamma
## parameters:
## shape: 6.97842734893307
## scale: 0.637176003492939
delay_rash_comparison <- measles_data %>%
count(delay_rash) %>%
complete(delay_rash = 0:15, fill = list(n = 0)) %>%
mutate(
prop = n / sum(n),
prop_dist = delay_rash_fit$distribution$d(delay_rash)
)
ggplot(delay_rash_comparison, aes(x = delay_rash)) +
geom_col(aes(y = prop), fill = "#d15656") +
geom_line(aes(y = prop_dist)) +
labs(
title = "Empirical vs Esimated distribution of delays from onset to rash",
x = "Days from onset of symptoms to rash",
y = "Proportion/probability"
)

Estimating severity
measles_data <- measles_data %>%
mutate(outcome = if_else(is.na(date_of_death), "recovered", "dead"))
tabyl(measles_data$outcome)
## measles_data$outcome n percent
## dead 12 0.06382979
## recovered 176 0.93617021
Visualising transmission chains
measles_contacts <- measles_data %>%
select(infector, case_ID)
measles_chains <- make_epicontacts(
linelist = measles_data,
contacts = measles_contacts,
id = "case_ID",
from = "infector",
to = "case_ID",
directed = TRUE
)
measles_chains
##
## /// Epidemiological Contacts //
##
## // class: epicontacts
## // 188 cases in linelist; 188 contacts; directed
##
## // linelist
##
## tibble [188, 14]
## id int 1 2 3 4 5 6
## infector int 45 45 172 180 45 180
## date_of_prodrome date 1861-11-21 1861-11-23 1861-11-28 1861-11-27 1861-11-22 1861-11-26
## date_of_rash date 1861-11-25 1861-11-27 1861-12-02 1861-11-28 1861-11-27 1861-11-29
## date_of_death date NA NA NA NA NA NA
## age dbl 7 6 4 13 8 12
## gender fct f f f m f m
## family_ID int 41 41 41 61 42 42
## class fct 1 1 0 2 1 2
## complications fct yes yes yes yes yes yes
## x_loc dbl 142.5 142.5 142.5 165 145 145
## y_loc dbl 100 100 100 102.5 120 120
## delay_rash int 4 4 4 1 5 3
## outcome chr recovered recovered recovered recovered recovered recovered
##
## // contacts
##
## tibble [188, 2]
## from int 45 45 172 180 45 180
## to int 1 2 3 4 5 6
plot(measles_chains)
all_ids <- measles_chains %>%
get_id(which = "all") %>%
na.omit()
measles_data %>%
count(infector) %>%
complete(
infector = all_ids,
fill = list(n = 0)
) %>%
ggplot(aes(x = n)) +
geom_bar(fill = "#18415a") +
labs(
title = "Distribution of case reproduction numbers",
x = "Number of secondary cases",
y = "Frequency"
)

measles_chains %>%
vis_epicontacts(
node_color = "family_ID",
node_shape = "gender",
shapes = c("m" = "male", "f" = "female")
)
Estimating the serial interval
measles_data <- measles_data %>%
mutate(
serial_interval = as.integer(date_of_prodrome - date_of_prodrome[infector])
)
# Could also use the following code to estimate the serial interval
get_pairwise(measles_chains, "date_of_prodrome")
## [1] 10 12 9 10 11 9 9 10 11 10 10 9 10 9 13 8 7 11 7 9 10 10 10 9 9 11 9 8 9 14 10 10 13 8 10 10 10 9 12 11 10 8 11 11 12 10 13 10 8 11 10 9 10 12 11 11 9 11 8 10 9 7 9 10 10 12 7 12 11 11 10 11 10 10 8 11 10 9 9 8 8 11 9
## [84] 8 10 12 8 8 12 10 11 12 11 9 13 9 10 10 9 9 10 10 8 12 14 8 10 11 15 11 10 10 13 9 10 10 11 11 10 12 11 12 9 11 13 10 14 9 11 8 13 11 12 9 9 9 10 10 10 9 NA 12 12 13 12 12 10 11 12 8 13 12 13 9 12 11 12 12 14 12 9 9 10 10 12 10
## [167] 10 8 10 13 11 11 NA NA 11 10 8 8 11 10 14 16 12 NA 11 11 15 11
serial_interval_fit <- measles_data %>%
pull(serial_interval) %>%
epitrix::fit_disc_gamma()
## Warning in epitrix::fit_disc_gamma(.): 4 NAs were removed from the data before fitting.
serial_interval_fit
## $mu
## [1] 10.89177
##
## $cv
## [1] 0.1500888
##
## $sd
## [1] 1.634733
##
## $ll
## [1] -352.9525
##
## $converged
## [1] TRUE
##
## $distribution
## A discrete distribution
## name: gamma
## parameters:
## shape: 44.3918886018654
## scale: 0.245355063017598
serial_interval_comparison <- measles_data %>%
count(serial_interval) %>%
complete(serial_interval = 0:20, fill = list(n = 0)) %>%
mutate(
prop = n / sum(n),
prop_dist = serial_interval_fit$distribution$d(serial_interval)
)
ggplot(serial_interval_comparison, aes(x = serial_interval)) +
geom_col(aes(y = prop), fill = "#2b4885") +
geom_line(aes(y = prop_dist)) +
labs(
title = "Empirical vs Esimated distribution of delays from onset to rash",
x = "Days from onset of symptoms to rash",
y = "Proportion/probability"
)
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 row(s) containing missing values (geom_path).

Transmission chains: further investigations
get_pairwise(measles_chains, "gender") %>%
tabyl()
## . n percent valid_percent
## f -> f 37 0.1968085 0.2228916
## f -> m 38 0.2021277 0.2289157
## m -> f 40 0.2127660 0.2409639
## m -> m 51 0.2712766 0.3072289
## <NA> 22 0.1170213 NA
get_pairwise(measles_chains, "gender", f = table) %>%
chisq.test()
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: .
## X-squared = 0.28626, df = 1, p-value = 0.5926
get_pairwise(measles_chains, "class", f = table) %>%
as_tibble() %>%
rename(infector = "values.from", infectee = "values.to") %>%
ggplot(aes(x = infectee, y = infector)) +
geom_tile(aes(fill = n)) +
scale_fill_viridis_c() +
labs(title = "Tranmission patterns by class")

get_pairwise(measles_chains, "age", f = table) %>%
as_tibble() %>%
rename(infector = "values.from", infectee = "values.to") %>%
ggplot(aes(x = infectee, y = infector)) +
geom_tile(aes(fill = n)) +
scale_fill_viridis_c() +
labs(title = "Tranmission patterns by age")

# Indicate if person infected by someone in their family
prop_same <- function(a, b) {
mean(a == b, na.rm = TRUE)
}
# Indicate if person infected by someone in their family
prop_same_perm <- function(a, b) {
perm_a <- sample(a, replace = FALSE)
mean(perm_a == b, na.rm = TRUE)
}
prop_same_fam <- measles_chains %>%
get_pairwise(attribute = "family_ID",
f = prop_same)
prop_same_fam_ref <- purrr::map_dbl(
1:999,
function(.x) {
get_pairwise(measles_chains,
attribute = "family_ID",
f = prop_same_perm)
}
)
prop_same_fam / mean(prop_same_fam_ref)
## [1] 21.88421
## [1] 21.39722
qplot(prop_same_fam_ref, geom = "histogram") +
theme_bw() +
geom_vline(xintercept = prop_same_fam, color = "#E03F57", size = 1) +
labs(x = "Proportion of transmission within the same family",
y = "Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
